library(plyr)
library(tidyverse)
library(here)
library(geojsonR)
library(janitor)
library(knitr)
library(lubridate)
library(mapview)
library(gbfs)
library(sf)
library(tmap)
library(tidycensus)
library(dplyr)
library(conflicted)
library(plotly)
conflicts_prefer(here::here)
[conflicted] Removing existing preference.[conflicted] Will prefer here::here over any other package.
conflicts_prefer(dplyr::rename)
[conflicted] Removing existing preference.[conflicted] Will prefer dplyr::rename over any other package.
conflicts_prefer(dplyr::filter)
[conflicted] Removing existing preference.[conflicted] Will prefer dplyr::filter over any other package.
conflicts_prefer(dplyr::mutate)
[conflicted] Removing existing preference.[conflicted] Will prefer dplyr::mutate over any other package.
Metro Station Entrances to map the location of metro, boarding data to show how many people are using the metro station, and bikeshare to show the number of people riding bikes.
All data is from the month September because there are no major holidays, the weather is still decent enough for people to ride bikes, and the number of tourists/ pleasure bike riders are reduced.
For the purpose of this project, we plan on focusing on the commuters, and plan on creating more bike locations to better suit the number of commuters.
metro <- FROM_GeoJson(here('data_raw', 'Metro_Station_Entrances_in_DC.geojson'))
metroRiders <- read.csv(here('data_raw', 'Boardings by Route Table_Full Data_data.csv'))
metroLoc <- read.csv(here('data_raw', 'Metro_Stations_Regional.csv'))
sept_raw <- read_csv(here('data_raw', '202309-capitalbikeshare-tripdata.csv'))
Rows: 450090 Columns: 13── Column specification ───────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): ride_id, rideable_type, start_station_name, end_station_name, member_casual
dbl (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, end_lng
dttm (2): started_at, ended_at
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
neigh = st_read(here("data_raw", "DC_Health_Planning_Neighborhoods.geojson")) %>% clean_names()
Reading layer `DC_Health_Planning_Neighborhoods' from data source
`C:\DATASCIENCE\basta-dataforskare\data_raw\DC_Health_Planning_Neighborhoods.geojson' using driver `GeoJSON'
Simple feature collection with 51 features and 8 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
Geodetic CRS: WGS 84
This filters the data so we are only getting entries for the weekdays and not the weekends, appending location variables to station names, and combining repeat stations with a summed amount of entries.
#metroLoc = metroLoc |>
#rename("X" = "ï..X")
metroAddy <- subset(metroLoc, select = c(NAME, ADDRESS, X, Y))|>
rename("Station" = "NAME", "Lon" = "X", "Lat" = "Y")
metroRiders$Time.Period = NULL
metroRiders$Day.of.Week = NULL
metroRiders$Holiday = NULL
metroRiders$Month = NULL
metroRiders$Year = NULL
metroRiders$Avg.Daily.Entries.Rounded = NULL
#metroRiders = metroRiders |>
#rename("Station" = "ï..Station")
metroR1 <- metroRiders |>
filter(Servicetype == "Weekday") |>
ddply("Station", numcolwise(sum))
METRO <- merge(x = metroR1, y = metroAddy, by = "Station")
glimpse(METRO)
Rows: 86
Columns: 5
$ Station <chr> "Anacostia", "Arlington Cemetery", "Ashburn", "Ballston-MU", "Benning Road", "Bethesda", "Bra…
$ Entries <int> 62365, 16111, 25199, 114875, 31518, 102933, 45638, 47644, 85141, 23630, 101604, 10032, 57042,…
$ ADDRESS <chr> "1101 HOWARD ROAD SE, WASHINGTON, DC", "1000 NORTH MEMORIAL DRIVE, ARLINGTON, VA", "43625-A C…
$ Lon <dbl> -76.99537, -77.06281, -77.49154, -77.11317, -76.93837, -77.09413, -77.05367, -76.91147, -76.9…
$ Lat <dbl> 38.86297, 38.88469, 39.00529, 38.88219, 38.89098, 38.98440, 38.81415, 38.82645, 38.93322, 38.…
Cleaning bike data
bikeR1 = sept_raw %>% select(started_at, start_lat, start_lng) %>% na.omit() %>% mutate(start_date=as.Date(started_at)) %>% select(start_date, start_lat, start_lng)
bikeR2 = bikeR1 %>% st_as_sf(coords=c("start_lng", "start_lat"), crs=4326)
st_crs(neigh$geometry[1])
Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
bikeR3 = bikeR2 %>% st_join(neigh)
#code for possible future mapping
#df1_s_sf = df1_s %>% st_as_sf(coords =c("start_lng", "start_lat"), crs = 4326)
MetroMap <- st_as_sf(METRO, coords = c("Lon", "Lat"), crs =4326)
MetroMap2 <- MetroMap %>%
st_join(neigh) %>% na.omit() %>%
ddply("code", numcolwise(sum))
neigh1 = neigh %>% select(code, geometry)
bikeR4 = bikeR3 %>% select(start_date, code, geometry) %>% st_drop_geometry()
bikeR5 = neigh1 %>% full_join(bikeR4) %>% filter(start_date != as.Date('2023-09-02')) %>% filter(start_date != as.Date('2023-09-03')) %>% filter(start_date != as.Date('2023-09-09')) %>% filter(start_date != as.Date('2023-09-10')) %>% filter(start_date != as.Date('2023-09-16')) %>% filter(start_date != as.Date('2023-09-17')) %>% filter(start_date != as.Date('2023-09-23')) %>% filter(start_date != as.Date('2023-09-24')) %>% filter(start_date != as.Date('2023-09-30'))
Joining with `by = join_by(code)`
#plot(neigh)
bikeR6 = data.frame(table(bikeR5$code)) %>% rename(code=Var1) %>% full_join(bikeR5) %>% select(code, Freq, geometry) %>% distinct() %>% na.omit()
Joining with `by = join_by(code)`
bikeR7 = bikeR6 %>% select (code, Freq) %>% rename(bike_freq = Freq)
MetroMap3 = MetroMap2 %>% select(Entries, code) %>% rename(metro_freq = Entries)
metro_bike_df = bikeR7 %>% full_join(MetroMap3) %>% mutate(metro_freq = replace_na(metro_freq, 0))
Joining with `by = join_by(code)`
#bikeR7 = bikeR5 %>% count(code, start_date)
#plot(bikeR6)
bikeR8 = bikeR6 %>% select (code, Freq) %>% rename(freq = Freq) %>% mutate(transport = 'bike')
MetroMap4 = MetroMap2 %>% select(Entries, code) %>% rename(freq = Entries) %>% mutate(transport = 'metro')
code = c("N1", "N10", "N11", "N14", "N15", "N16", "N2", "N20", "N21", "N22", "N26", "N27", "N28", "N3", "N32", "N33", "N34", "N36", "N37", "N4", "N40", "N41", "N45", "N46", "N47", "N49", "N5", "N50", "N51", "N6", "N8")
freq = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
transport = c('metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro', 'metro')
metroExtra = data.frame(code, freq, transport)
MetroMap4 = MetroMap4 %>% rbind(metroExtra)
metro_bike_df2 = bikeR8 %>% full_join(MetroMap4)
Joining with `by = join_by(code, freq, transport)`
entrances=st_read(here("Metro_Station_Entrances_in_DC.geojson")) %>% clean_names()
Reading layer `Metro_Station_Entrances_in_DC' from data source
`C:\DATASCIENCE\basta-dataforskare\Metro_Station_Entrances_in_DC.geojson' using driver `GeoJSON'
Simple feature collection with 113 features and 23 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -77.08577 ymin: 38.84465 xmax: -76.93472 ymax: 38.97578
Geodetic CRS: WGS 84
class(entrances)
[1] "sf" "data.frame"
plot(entrances)
Warning: plotting the first 10 out of 23 attributes; use max.plot = 23 to plot all
charts <- ggplot(metro_bike_df2, aes(fill=transport, y=freq, x=code)) + geom_bar(position='dodge', stat='identity')
ggplotly(charts)
Based on the data comaprisons of metro entries adn bike entries, we would recommend that the bikeshare group look into increasing the amount of bike stations in neighboorhoods: n1, n2, n3, n4, n6, n8, n10, n11, n14, n15, n16, n20, n21, n22, n26, n27, n28, n32, n33, n34, n36, n37, n40, n41, n45, n46, n47, n49, n50, n51.